##### Script for mergning all different dataframes together to create the final three dataframes
### Output: dataframe wide, dataframe long, dataframe without conjoint
## Jonne Kamphorst May




library(tidyverse)


#### Do long ####
final_data_long <- readRDS("data/final_data_long.rds")
final_data_long_p2 <- readRDS("data/final_data_part2_long.rds")

final_data_long$StartDate <- as.character(final_data_long$StartDate )
final_data_long$EndDate <- as.character(final_data_long$StartDate )
final_data_long$RecordedDate <- as.character(final_data_long$RecordedDate )

final_data_long_all <- full_join(final_data_long, final_data_long_p2)


#Clean the data
drop <- c("Duration (in seconds)", "RecordedDate", "EndDate", "QPT4.1_First Click",
          "QPT4.1_Last Click", "QPT4.1_Page Submit", "QPT4.1_Click Count",
          "QPT4.6_First Click", "QPT4.6_Last Click", "QPT4.6_Page Submit", "QPT4.6_Click Count",
          "QPT4.9_First Click", "QPT4.9_Last Click", "QPT4.9_Page Submit", "QPT4.9_Click Count",
          "QPT4.22_First Click", "QPT4.22_Last Click", "QPT4.22_Page Submit", "QPT4.22_Click Count",
          "QV1_wrong", "QV2_wrong", "QV3_wrong", "QV4_wrong", "QV5_wrong", "QV6_wrong",
          "ï...", "user.id", "questions.load.time", "end.time", "status", "function.", 
          "language", "credit.language", "wecolme.message", "completion.message",
          "date_wyear_wtime", "matching_time", "duplicated", 
          "Q3.1_First.Click", "Q3.1_Last.Click", "Q3.1_Page.Submit", "Q3.1_Click.Count",
          "Q8.1_First.Click", "Q8.1_Last.Click", "Q8.1_Page.Submit", "Q8.1_Click.Count", "Q9.1_First.Click",
          "Q9.1_Last.Click", "Q9.1_Page.Submit", "Q9.1_Click.Count", "Duration..in.seconds.",
          "QPT4.1_First.Click", "QPT4.1_Last.Click", "QPT4.1_Page.Submit", "QPT4.1_Click.Count", "QPT5.1_Click.Count",
          "Status", "IPAddress", "Progress", "ResponseId", "RecipientLastName", "RecipientFirstName",
          "RecipientEmail", "ExternalReference", "LocationLatitude", "LocationLongitude",
          "DistributionChannel", "UserLanguage")


final_data_long_all <- final_data_long_all %>% select(-drop)
#write_rds(final_data_long_all, "data/final_data_long_all.rds")






#### Do wide ####
final_data_wide <- readRDS("data/final_data_wide.rds")
final_data_wide_p2 <- readRDS("data/final_data_part2_wide.rds")

final_data_wide$StartDate <- as.character(final_data_wide$StartDate )
final_data_wide$EndDate <- as.character(final_data_wide$StartDate )
final_data_wide$RecordedDate <- as.character(final_data_wide$RecordedDate )

final_data_wide_all <- full_join(final_data_wide, final_data_wide_p2)

#Clean the data
final_data_wide_all <- final_data_wide_all %>% select(-drop)
#write_rds(final_data_wide_all, "data/final_data_wide_all.rds")






#### Do normal (Delete duplicates from wide) ####
final_data_noncj <- final_data_wide_all %>% distinct(rid, .keep_all=T)

#clean data
drop_noncj <- c("task", "profile", "selected", "position", "clarity", "Partij",
                "EU", "wage", "farms", "immi", "climate", "load.time", "date", 
                "start.time", "Finished")
final_data_noncj <- final_data_noncj %>% select(-drop_noncj)
#write_rds(final_data_noncj, "data/final_data_noncj.rds")




